library(dplyr)

Attaching package: <U+393C><U+3E31>dplyr<U+393C><U+3E32>

The following objects are masked from <U+393C><U+3E31>package:stats<U+393C><U+3E32>:

    filter, lag

The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:

    intersect, setdiff, setequal, union
data <- read.csv("C:/Users/kzorina/Studing/MMDS/FutureOfWikipedia/data/df_all_with_timestamps.csv", encoding = "UTF-8")
data
cleaned_data <- data %>% 
  mutate(Ukr_time = as.POSIXct(uk_timestamp, format = "b'%Y%m%d%H%M%S'"),
         Eng_time = as.POSIXct(en_timestamp, format = "b'%Y%m%d%H%M%S'")) %>% 
  mutate(Time_to_translation = as.difftime(Eng_time-Ukr_time, units = "secs")) %>% 
  mutate(Time_to_translation_days = as.integer(as.numeric(Time_to_translation, units = "days"))) %>% 
  mutate(Time_to_translation_hours = as.integer(as.numeric(Time_to_translation, units = "hours"))) %>% 
  select(uk_id,uk_title, en_id, en_title, Ukr_time, Eng_time, Time_to_translation_hours, Time_to_translation_days)
cleaned_data
ukr_early_eng <- na.omit(cleaned_data[cleaned_data$Time_to_translation_hours > 0,])
ukr_early_eng
na_DF <- cleaned_data[is.na(cleaned_data$Time_to_translation_days),]
na_DF
library(pracma)
hist(ukr_early_eng$Time_to_translation_days, 
     breaks=linspace(min(ukr_early_eng$Time_to_translation_days)-0.5, max(ukr_early_eng$Time_to_translation_days)+0.5, n = 70),
     main = "Time before translation (days)",
     col = "#66A4A5",
     xlab = "Days from Ukr page creation to Eng page creation",
     ylab = "Amount of such pages")

linspace(min(ukr_early_eng$Time_to_translation_days)-0.5, max(ukr_early_eng$Time_to_translation_days)+0.5, n = 70)
Error in linspace(min(ukr_early_eng$Time_to_translation_days) - 0.5, max(ukr_early_eng$Time_to_translation_days) +  : 
  could not find function "linspace"
peak_data
library(ggplot2)
library(plotly)
g <- ggplot() +
  geom_point(data = peak_data, 
             aes(x = Ukr_time, 
                 y = Time_to_translation_days, 
                 text =      paste("time:", strftime(Ukr_time, format="%Y-%m-%d %H:%M:%S"), 
                                   "eng translation:",Eng_time
                                   #,"page title:",eval(parse(uk_title))
                                   )), col = "red", shape = 4,  size = 1)+
  labs(x = "Creation date of Ukr page", y = "Days from Ukr page creation to Eng page creation")
Ignoring unknown aesthetics: text
p = ggplotly(g);
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
p
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KYGBge3J9DQpsaWJyYXJ5KGRwbHlyKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCmRhdGEgPC0gcmVhZC5jc3YoIkM6L1VzZXJzL2t6b3JpbmEvU3R1ZGluZy9NTURTL0Z1dHVyZU9mV2lraXBlZGlhL2RhdGEvZGZfYWxsX3dpdGhfdGltZXN0YW1wcy5jc3YiLCBlbmNvZGluZyA9ICJVVEYtOCIpDQpkYXRhDQpgYGANCg0KYGBge3J9DQpjbGVhbmVkX2RhdGEgPC0gZGF0YSAlPiUgDQogIG11dGF0ZShVa3JfdGltZSA9IGFzLlBPU0lYY3QodWtfdGltZXN0YW1wLCBmb3JtYXQgPSAiYiclWSVtJWQlSCVNJVMnIiksDQogICAgICAgICBFbmdfdGltZSA9IGFzLlBPU0lYY3QoZW5fdGltZXN0YW1wLCBmb3JtYXQgPSAiYiclWSVtJWQlSCVNJVMnIikpICU+JSANCiAgbXV0YXRlKFRpbWVfdG9fdHJhbnNsYXRpb24gPSBhcy5kaWZmdGltZShFbmdfdGltZS1Va3JfdGltZSwgdW5pdHMgPSAic2VjcyIpKSAlPiUgDQogIG11dGF0ZShUaW1lX3RvX3RyYW5zbGF0aW9uX2RheXMgPSBhcy5pbnRlZ2VyKGFzLm51bWVyaWMoVGltZV90b190cmFuc2xhdGlvbiwgdW5pdHMgPSAiZGF5cyIpKSkgJT4lIA0KICBtdXRhdGUoVGltZV90b190cmFuc2xhdGlvbl9ob3VycyA9IGFzLmludGVnZXIoYXMubnVtZXJpYyhUaW1lX3RvX3RyYW5zbGF0aW9uLCB1bml0cyA9ICJob3VycyIpKSkgJT4lIA0KICBzZWxlY3QodWtfaWQsdWtfdGl0bGUsIGVuX2lkLCBlbl90aXRsZSwgVWtyX3RpbWUsIEVuZ190aW1lLCBUaW1lX3RvX3RyYW5zbGF0aW9uX2hvdXJzLCBUaW1lX3RvX3RyYW5zbGF0aW9uX2RheXMpDQpjbGVhbmVkX2RhdGENCmBgYA0KDQpgYGB7cn0NCnVrcl9lYXJseV9lbmcgPC0gbmEub21pdChjbGVhbmVkX2RhdGFbY2xlYW5lZF9kYXRhJFRpbWVfdG9fdHJhbnNsYXRpb25faG91cnMgPiAwLF0pDQp1a3JfZWFybHlfZW5nDQpgYGANCg0KDQpgYGB7cn0NCm5hX0RGIDwtIGNsZWFuZWRfZGF0YVtpcy5uYShjbGVhbmVkX2RhdGEkVGltZV90b190cmFuc2xhdGlvbl9kYXlzKSxdDQpuYV9ERg0KYGBgDQoNCg0KYGBge3J9DQpsaWJyYXJ5KHByYWNtYSkNCmhpc3QodWtyX2Vhcmx5X2VuZyRUaW1lX3RvX3RyYW5zbGF0aW9uX2RheXMsIA0KICAgICBicmVha3M9bGluc3BhY2UobWluKHVrcl9lYXJseV9lbmckVGltZV90b190cmFuc2xhdGlvbl9kYXlzKS0wLjUsIG1heCh1a3JfZWFybHlfZW5nJFRpbWVfdG9fdHJhbnNsYXRpb25fZGF5cykrMC41LCBuID0gNzApLA0KICAgICBtYWluID0gIlRpbWUgYmVmb3JlIHRyYW5zbGF0aW9uIChkYXlzKSIsDQogICAgIGNvbCA9ICIjNjZBNEE1IiwNCiAgICAgeGxhYiA9ICJEYXlzIGZyb20gVWtyIHBhZ2UgY3JlYXRpb24gdG8gRW5nIHBhZ2UgY3JlYXRpb24iLA0KICAgICB5bGFiID0gIkFtb3VudCBvZiBzdWNoIHBhZ2VzIikNCmBgYA0KDQoNCg0KYGBge3J9DQpsaW5zcGFjZShtaW4odWtyX2Vhcmx5X2VuZyRUaW1lX3RvX3RyYW5zbGF0aW9uX2RheXMpLTAuNSwgbWF4KHVrcl9lYXJseV9lbmckVGltZV90b190cmFuc2xhdGlvbl9kYXlzKSswLjUsIG4gPSA3MCkNCnBlYWtfZGF0YSA8LSB1a3JfZWFybHlfZW5nW3Vrcl9lYXJseV9lbmckVGltZV90b190cmFuc2xhdGlvbl9kYXlzID4gNzM5ICYgdWtyX2Vhcmx5X2VuZyRUaW1lX3RvX3RyYW5zbGF0aW9uX2RheXMgPCA4MDcsXQ0KYGBgDQoNCg0KYGBge3J9DQpwZWFrX2RhdGENCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocGxvdGx5KQ0KZyA8LSBnZ3Bsb3QoKSArDQogIGdlb21fcG9pbnQoZGF0YSA9IHBlYWtfZGF0YSwgDQogICAgICAgICAgICAgYWVzKHggPSBVa3JfdGltZSwgDQogICAgICAgICAgICAgICAgIHkgPSBUaW1lX3RvX3RyYW5zbGF0aW9uX2RheXMsIA0KICAgICAgICAgICAgICAgICB0ZXh0ID0gICAgICBwYXN0ZSgidGltZToiLCBzdHJmdGltZShVa3JfdGltZSwgZm9ybWF0PSIlWS0lbS0lZCAlSDolTTolUyIpLCANCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImVuZyB0cmFuc2xhdGlvbjoiLEVuZ190aW1lDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICMsInBhZ2UgdGl0bGU6IixldmFsKHBhcnNlKHVrX3RpdGxlKSkNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKSksIGNvbCA9ICJyZWQiLCBzaGFwZSA9IDQsICBzaXplID0gMSkrDQogIGxhYnMoeCA9ICJDcmVhdGlvbiBkYXRlIG9mIFVrciBwYWdlIiwgeSA9ICJEYXlzIGZyb20gVWtyIHBhZ2UgY3JlYXRpb24gdG8gRW5nIHBhZ2UgY3JlYXRpb24iKQ0KDQpwID0gZ2dwbG90bHkoZyk7DQpwDQpgYGANCg0KYGBge3J9DQpzYXZlIDwtIHBlYWtfZGF0YVtwZWFrX2RhdGEkVWtyX3RpbWU+IjIwMTItMTAtMDQiICYgcGVha19kYXRhJFVrcl90aW1lPCIyMDEyLTEyLTMwIixdDQp3cml0ZS5jc3Yoc2F2ZSwiQzovVXNlcnMva3pvcmluYS9TdHVkaW5nL01NRFMvRnV0dXJlT2ZXaWtpcGVkaWEvZGF0YS9zYXZlLmNzdiIpDQpgYGANCg0K